Read in Analytic Data files
pt.analytic.df <- read.csv(here::here("data", "PupilLightResponse_AnalyticSample_Demog.csv"),
header = T, stringsAsFactors = F)
pt.analytic.df$BMI_c <- pt.analytic.df$BMI - round(mean(pt.analytic.df$BMI), 2)
right_0.post.w <- read.csv(here::here("data", "PupilLightResponse_RightEye_Post_Wide.csv"),
header = T, stringsAsFactors = F)
rownames(right_0.post.w) <- right_0.post.w$subject_id
# right_0.post <- read.csv(here::here("data", "PupilLightResponse_RightEye_Post_Long.csv"),
# header = T, stringsAsFactors = F)
pctChg_vars <- names(right_0.post.w)[grepl("percent_change", names(right_0.post.w))]
sofr_impute_df <- right_0.post.w[, pctChg_vars]
rownames(sofr_impute_df) <- rownames(right_0.post.w)
sofr_fpca <- fpca.face(as.matrix(sofr_impute_df))
sofr_fpca2 <- sofr_fpca$Yhat
names(sofr_fpca2) <- names(sofr_impute_df)
## Adding in fpca imputed data where missing
sofr_imputed <- sofr_impute_df
sofr_imputed[is.na(sofr_imputed)] <- sofr_fpca2[is.na(sofr_imputed)]
ncols = ncol(sofr_imputed)
pt.analytic.df$female <- ifelse(pt.analytic.df$demo_gender == "Female", 1, 0)
smk.df <- pt.analytic.df[, c("subject_id", "user_cat", "female", "BMI_c")]
smk.df$smoker <- ifelse(pt.analytic.df$user_cat %in% c("daily", "occasional"), 1, 0)
sind <- seq(0, 1, len = ncols)
smat <- matrix(sind, nrow(smk.df), ncols, byrow = T)
smk.df$Zraw <- I(as.matrix(sofr_imputed))
smk.df$smat <- I(smat)
smk.df$lmat <- I(matrix(1/ncols, nrow(smk.df), ncols))
smk.df$zlmat <- I(smk.df$lmat * smk.df$Zraw)
smk_fglm_ps2 <- gam(smoker ~ s(smat, by=zlmat, bs = "cr", k = 30), data=smk.df,
method = "REML", family = binomial)
summary(smk_fglm_ps2)
##
## Family: binomial
## Link function: logit
##
## Formula:
## smoker ~ s(smat, by = zlmat, bs = "cr", k = 30)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.3731 0.5711 2.404 0.0162 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(smat):zlmat 4.79 5.463 10.78 0.105
##
## R-sq.(adj) = 0.117 Deviance explained = 13.1%
## -REML = 52.945 Scale est. = 1 n = 84
# original model: smk_fglm_ps2
# Sex and gender added as scalar features
smk_fglm_cv <- gam(smoker ~ female + BMI_c +
s(smat, by=zlmat, bs = "cr", k = 30),
data=smk.df,
method = "REML", family = binomial)
summary(smk_fglm_cv)
##
## Family: binomial
## Link function: logit
##
## Formula:
## smoker ~ female + BMI_c + s(smat, by = zlmat, bs = "cr", k = 30)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.88883 0.65476 2.885 0.00392 **
## female -0.90047 0.50073 -1.798 0.07213 .
## BMI_c 0.05727 0.05846 0.980 0.32730
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(smat):zlmat 3.082 3.465 6.182 0.158
##
## R-sq.(adj) = 0.0812 Deviance explained = 10.6%
## -REML = 52.086 Scale est. = 1 n = 84
anova(smk_fglm_cv, smk_fglm_ps2, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: smoker ~ female + BMI_c + s(smat, by = zlmat, bs = "cr", k = 30)
## Model 2: smoker ~ s(smat, by = zlmat, bs = "cr", k = 30)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 77.152 96.796
## 2 76.864 94.127 0.28818 2.6686 0.02181 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df_sofr <- data.frame("smat" = seq(0, 1, length.out = ncol(smk.df$smat)),
"zlmat" = 1)
sofr_pred <- predict(smk_fglm_ps2, newdata = df_sofr, se.fit = TRUE, type = "iterms")
# sofr_pred2 <- predict(smk_fglm_ps2, newdata = df_sofr, se.fit = TRUE, type = "terms")
plot.sofr <- data.frame("frame" = 0:(ncol(smk.df$smat) - 1),
"f0" = sofr_pred$fit[, 1],
'f0.se' = sofr_pred$se.fit[, 1])
plot.sofr$lci <- exp(plot.sofr$f0 - 1.96*plot.sofr$f0.se)
plot.sofr$uci <- exp(plot.sofr$f0 + 1.96*plot.sofr$f0.se)
plot.sofr$OR <- exp(plot.sofr$f0)
plot.sofr$sec <- plot.sofr$frame/30
plot.sofr$p <- exp(plot.sofr$f0)/(1+exp(plot.sofr$f0))
sigRegion <- function(df, crit.val, imp.var){
time <- imp.var[1]
est <- imp.var[2]
lci <- imp.var[3]
uci <- imp.var[4]
crit.rows <- which((df[, lci] < crit.val & df[, uci] < crit.val) |
(df[, lci] > crit.val & df[, uci] > crit.val))
# print(crit.rows)
group <- vector(mode = "numeric", length = length(crit.rows))
grp.ind <- 1
for(i in 1:length(crit.rows)){
if(i == 1){
group[i] <- grp.ind}
else(if(i > 1){
if(crit.rows[i] - crit.rows[i-1] == 1){
group[i] <- grp.ind
}
else(if(crit.rows[i] - crit.rows[i-1] != 1){
grp.ind <- grp.ind+1
group[i] <- grp.ind
})
})
}
# print(group)
num.grp <- unique(group)
ind.df <- data.frame(group = NA,
start = NA,
end = NA)
for(i in num.grp){
ind.df[i,] <- c(i, min(crit.rows[group == i]), max(crit.rows[group == i]))
}
# print(ind.df)
res.df <- data.frame(int.start = NA,
int.end = NA,
est.time = NA,
est = NA,
lci = NA,
uci = NA)
# print(nrow(ind.df))
for(i in 1:nrow(ind.df)){
sub.df <- df[(ind.df$start[i]):(ind.df$end[i]), ]
# print(sub.df[1, ])
if(sub.df[1, lci] > crit.val & sub.df[1, uci] >crit.val){
start1 <- round(sub.df[1, time], 2)
end1 <- round(sub.df[nrow(sub.df), time], 2)
or1 <- max(sub.df[ , est])
peaktime1 <- round(sub.df[sub.df[, est] == or1, time], 2)
lci1 <- round(sub.df[sub.df[, est] == or1, lci], 2)
uci1 <- round(sub.df[sub.df[, est] == or1, uci], 2)
or1 <- round(or1, 2)
res.df[i, ] <- c(start1, end1, peaktime1, or1, lci1, uci1)
# print(res.df)
}
if(sub.df[1, lci] < crit.val & sub.df[1, uci] < crit.val){
start2 <- round(sub.df[1, time], 2)
end2 <- round(sub.df[nrow(sub.df), time], 2)
or2 <- min(sub.df[, est])
peaktime2 <- round(sub.df[sub.df[, est] == or2, time], 2)
lci2 <- round(sub.df[sub.df[, est] == or2, lci], 2)
uci2 <- round(sub.df[sub.df[, est] == or2, uci], 2)
or2 <- round(or2, 2)
res.df[i, ] <- c(start2, end2, peaktime2, or2, lci2, uci2)
# print(res.df)
}
}
# crit.ind <- by(crit.rows, INDICES = list(group), FUN = function(x) c(min(x), max(x)))
return(res.df)
}
sofr.sigRegions <- sigRegion(plot.sofr, 1, imp.var = c("sec", "OR", "lci", "uci"))
sofr_plot <- ggplot(data = plot.sofr, aes(x=frame/30, y = exp(f0)))+
geom_line()+
geom_line(aes(x=frame/30, y = exp(f0-1.96*f0.se)), linetype = 'longdash')+
geom_line(aes(x=frame/30, y = exp(f0+1.96*f0.se)), linetype = 'longdash')+
geom_segment(data=sofr.sigRegions, aes(x = int.start, y=1, xend= int.end, yend = 1),
color = "darkred", linewidth =1.2)+
# geom_hline(yintercept = 1, color = "darkred", linetype = 1) +
scale_x_continuous(expand = c(0, 0), limits = c(0,10),
breaks = c(0,2,4,6,8,10))+
scale_y_continuous(limits = c(0,8),
breaks = c(0,1,2,3,4,6,8)) +
labs(title = "",
x = "Seconds after start of light test",
y = "Odds ratio between smokers and non-smokers")+
theme_bw()
sofr_plot
df_sofr_sex <- data.frame("smat" = seq(0, 1, length.out = ncol(smk.df$smat)),
"zlmat" = 1, female = 0, BMI_c = 0)
sofr_pred_sex<- predict(smk_fglm_cv, newdata = df_sofr_sex, se.fit = TRUE, type = "iterms")
df_sofr <- data.frame("smat" = seq(0, 1, length.out = ncol(smk.df$smat)),
"zlmat" = 1)
sofr_pred<- predict(smk_fglm_ps2, newdata = df_sofr, se.fit = TRUE, type = "iterms")
plot.sofr_sex <- data.frame("frame" = 0:(ncol(smk.df$smat) - 1),
"model_demog" = sofr_pred_sex$fit[,3],
"model_demog_se" = sofr_pred_sex$se.fit[,3],
"model_og" = sofr_pred$fit[,1],
"model_og_se" = sofr_pred$se.fit[,1])
sofr_plot_sex <- ggplot(data = plot.sofr_sex, aes(x=frame/30, y = exp(model_og)))+
geom_line()+
geom_line(aes(x=frame/30, y = exp(model_og-1.96*model_og_se)), linetype = 'longdash')+
geom_line(aes(x=frame/30, y = exp(model_og+1.96*model_og_se)), linetype = 'longdash')+
geom_line(aes(x = frame/30, y = exp(model_demog)), col = 'red')+
geom_line(aes(x=frame/30, y = exp(model_demog-1.96*model_demog_se)), linetype = 'longdash', col = "red")+
geom_line(aes(x=frame/30, y = exp(model_demog+1.96*model_demog_se)), linetype = 'longdash', col = "red")+
scale_x_continuous(expand = c(0, 0), limits = c(0,10),
breaks = c(0,2,4,6,8,10))+
scale_y_continuous(limits = c(0,8),
breaks = c(0,1,2,3,4,6,8)) +
labs(title = "",
x = "Seconds after start of light test",
y = "Odds ratio between smokers and non-smokers")+
theme_bw()
sofr_plot_sex
Red line is
the model with the demographic variables. Black line is the original
model.